##INTRODUCTION
In order to analyze the dataset these packages are required: ggplot2, dplyr, magrittr. Then we import the dataset. This dataset is about Covid-19 pandemic going from 31/12/2019 to 14/12/2020 and has 12 variables: date, day, month, year, cases, deaths, country, code (of the country), population, continent, cases_cum (cumulative cases) and deaths_cum (cumulative deaths).
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
library(magrittr)
df <- read.delim("/Users/martina/Desktop/covid.csv", sep=";")
This analysis will focus on registered deaths and cases of Covid-19 in five continents: Europe, Asia, Africa, Oceania and America. Since it is a time series, the “date” variable needs to be transformed into the correct format so that the visualization of data is correct.
ord_df<- df[order(as.Date(df$date, format="%d/%m/%Y")),]
ord_df$date <- as.Date(ord_df$date,format = "%d/%m/%Y")
format(as.Date(ord_df$date), "%m/%Y")
##Cases of Covid-19
The visualization process starts at continent-level and then focuses on the most relevant and affected countries.
#plot of Asia 2019-2020 daily cases, on y cases, on x months
ord_df %>% filter(continent == "Asia") %>%
ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#we focus on China and India
c<- ord_df %>% filter(country == c("China","India")) %>%
ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
c
#plot of Europe
ord_df %>% filter(continent == "Europe") %>%
ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of France, Italy and Spain
e<- ord_df %>% filter(country == c("Italy", "France", "Spain"))%>%
ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
e
#plot of Africa
ord_df %>% filter(continent == "Africa") %>%
ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of South Africa
a<- ord_df %>% filter(country == c("South_Africa", "Morocco")) %>%
ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
a
#plot of America
ord_df %>% filter(continent == "America") %>%
ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot United States of America and Brazil
us<- ord_df %>% filter(country == c("United_States_of_America", "Brazil")) %>%
ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
us
#plot of Oceania
ord_df %>% filter(continent == "Oceania") %>%
ggplot(aes(x = date, y = cases, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of Australia and French Polynesia
o<- ord_df %>% filter(country == c("French_Polynesia", "Australia")) %>%
ggplot(aes(x = date, y = cases, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
o
##Deaths
Moreover these plots show the number of deaths at continent- and country-level. Notice that one of the observation of Spain deaths is negative so there should be an error in the dataset.
ord_df %>% filter(continent == "Asia") %>%
ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#we focus on China and India
c1<- ord_df %>% filter(country == c("China", "India")) %>%
ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
c1
#plot of Europe
ord_df %>% filter(continent == "Europe") %>%
ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of France, Italy and Spain
e1<- ord_df %>% filter(country == c("Italy", "France", "Spain"))%>%
ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
e1
#plot of Africa
ord_df %>% filter(continent == "Africa") %>%
ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of South Africa
a1<- ord_df %>% filter(country == c("South_Africa", "Morocco")) %>%
ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
a1
#plot of America
ord_df %>% filter(continent == "America") %>%
ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot United States of America and Brazil
us1<- ord_df %>% filter(country == c("United_States_of_America", "Brazil")) %>%
ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
us1
#plot of Oceania
ord_df %>% filter(continent == "Oceania") %>%
ggplot(aes(x = date, y = deaths, color = country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
#plot of Australia and French Polynesia
o1<- ord_df %>% filter(country == c("French_Polynesia", "Australia")) %>%
ggplot(aes(x = date, y = deaths, color=country)) + geom_point() + geom_line() +
theme_bw() +
theme(legend.position = "bottom")
o1
###COMPARISON DEATHS AND CASES FOR COUNTRIES
The following plots compare deaths and cases in the most affected countries.
cowplot:: plot_grid(c,c1, nrow=2, ncol=1)
cowplot:: plot_grid(o,o1, nrow=2, ncol=1)
cowplot:: plot_grid(a,a1, nrow=2, ncol=1)
cowplot:: plot_grid(us,us1, nrow=2, ncol=1)
cowplot:: plot_grid(e,e1, nrow=2, ncol=1)